# suppress console output -------------------------------------------------

#" Suppress cat in external package
#"
#" \code{quiet()} suppresses cat output.
#"
#" @param x Input. It can be any kind.

quiet <- function(x) {
  sink(tempfile())
  on.exit(sink())
  invisible(force(x))
}



# RDD with cluster variable -----------------------------------------------

rdd_model <- function(data, outcome, clustervar, running.var) {

  # cluster variable --------------------------------------------------------
  # model formula
  ff <- paste0(outcome, "~", clustervar)
  # model frame
  df <- model.frame(formula =  ff, data)
  # design matrix
  design_matrix <- cbind(model.matrix(as.formula(ff), df)[, -1])
  # drop columns without variation
  design_matrix <- cbind(
    design_matrix[, apply(design_matrix, 2, function(x) length(table(x))) > 1]
  )

  # model -------------------------------------------------------------------
  m <- suppressMessages(
    suppressWarnings(
      rdrobust::rdrobust(
        # the outcome
        y = dplyr::pull(.data = data, var = outcome),
        # the running variable
        x = dplyr::pull(.data = data, var = running.var),
        # the cutoff
        c = 0,
        # the cluster variable as a fixed effect
        covs = design_matrix,
        # variable bandwidths either side of the cutoff
        bwselect = "certwo",
        # uniform kernel
        kernel = "uniform",
        # report all estimates
        all = TRUE
      )
    )
  )
  return(m)
}



# rdd plot ----------------------------------------------------------------

rdd_plot <- function(data, model, outcome, clustervar, running.var, title) {

  # cluster variable --------------------------------------------------------
  # model formula
  ff <- paste0(outcome, "~", clustervar)
  # model frame
  df <- model.frame(formula =  ff, data)
  # design matrix
  design_matrix <- cbind(model.matrix(as.formula(ff), df)[, -1])
  # drop columns without variation
  design_matrix <- cbind(
    design_matrix[, apply(design_matrix, 2, function(x) length(table(x))) > 1]
  )

  # plot --------------------------------------------------------------------

  quiet(
    p <- rdrobust::rdplot(
      # running variable
      x = dplyr::pull(.data = data, var = running.var),
      #  outcome
      y = dplyr::pull(.data = data, var = outcome),
      # cutoff
      c = 0,
      # order of the global polynomial (defaults to 4)
      # uniform kernel
      kernel = "uniform",
      # bandwidths
      h = c(model$bws["b", "left"], model$bws["b", "right"]),
      # the cluster variable as a fixed effect
      covs = design_matrix,
      x.label = "Interview date (cutoff = 0)",
      y.label = "Satisfaction with Democracy (normalized)",
      x.lim = c(-20, 70),
      y.lim = c(-1, 1),
      title = title,
      col.dots = "darkgray",
      col.lines = "black"
    )
  )

  # plot theme
  p$rdplot <- p$rdplot +
    ggplot2::theme(
      plot.title = ggplot2::element_text(size = 13, family = "mono"),
      axis.title.x = ggplot2::element_text(
        vjust = -1, size = 10, family = "mono"
      ),
      axis.title.y = ggplot2::element_text(
        vjust = +2, size = 10, family = "mono"
      ),
      axis.text.y = ggplot2::element_text(
        size = 9, face = "plain", color = "black"
      ),
      axis.text.x = ggplot2::element_text(
        size = 9, face = "plain", color = "black"
      ),
      panel.background = ggplot2::element_rect(
        fill = "white", color = "black", linetype = "solid"
      ),
      plot.background = ggplot2::element_rect(fill = "white"),
      panel.grid = ggplot2::element_blank(),
      panel.border = ggplot2::element_rect(
        color = "black", fill = NA, linewidth = 1
      ),
      panel.spacing.x = grid::unit(x = 3, units = "cm"),
      panel.spacing.y = grid::unit(x = 3, units = "cm"),
      strip.background = ggplot2::element_rect(
        color = "black", fill = "gray"
      ),
      plot.margin = grid::unit(c(1, 1, 1, 1), "lines")
    )

  return(p)

}


# permutation_analysis ----------------------------------------------------

permutation_analysis <- function(
  data, min_day = -30, max_day = 30, winner = TRUE, verbose = TRUE
) {

  # initial binding of globals
  `%>%` <- dplyr::`%>%`
  relative_date <- NULL

  # binary variable which captures whether the government was formed or not
  data <- dplyr::mutate(.data = data, gov_formed = ifelse(
    test = relative_date > 0, yes = 1, no = 0
  ))

  # Average difference in swd before and after gov formation
  pre_post <- lm(
    formula = swd ~ gov_formed + election,
    data = data,
    subset = relative_date >= min_day & relative_date <= max_day
  )

  # Instead of post-government formation date use a randomly-chosen time, then
  # estimate pre v post and compare difference to the placebo distribution

  # simulation --------------------------------------------------------------
  set.seed(123)
  nsims <- 10000

  # create progress bar
  if (verbose) {
    pb <- utils::txtProgressBar(min = 1, max = nsims, style = 3)
  }

  # loop over simulation iterations
  df_betas <- lapply(X = seq(1:nsims), FUN = function(x) {

    # draw a random cutoff date
    cutoff <- as.integer(runif(n = 1, min = min_day, max = max_day + 1))

    # generate placebo government formation date
    data <- dplyr::mutate(.data = data, placebo_gov_formed = ifelse(
      test = relative_date > cutoff, yes = 1, no = 0
    ))

    # mean difference in swd
    m <- lm(
      formula = swd ~ placebo_gov_formed + election,
      data = data,
      subset = relative_date >= (cutoff + min_day) &
        relative_date <= (cutoff + max_day)
    )

    # update progress bar
    if (verbose) {
      utils::setTxtProgressBar(pb = pb, value = x)
    }

    # store results
    results <- tibble::tibble(
      placebo_cutoff = cutoff,
      beta = coefficients(m)["placebo_gov_formed"]
    )
    return(results)
  })

  # close progress bar
  if (verbose) {
    close(pb)
  }

  # row-bind list element
  df_betas <- df_betas %>%
    dplyr::bind_rows(df_betas)

  # estimated effect at the actual cutoff
  actual_est <- coefficients(pre_post)["gov_formed"]

  # one-sided p-value
  if (winner) {
    pv <- mean(dplyr::pull(.data = df_betas, var = beta) >= actual_est)
  } else {
    pv <- mean(dplyr::pull(.data = df_betas, var = beta) <= actual_est)
  }

  return(list(
    actual_estimate = actual_est,
    sampling_distribution = df_betas,
    pvalue = pv
  ))
}



# permuation plot ---------------------------------------------------------

permutation_plot <- function(permutation.out, title) {

  ## x-axis limits

  # lower (left) limit
  x_lower <- plyr::round_any(
    x = min(permutation.out$sampling_distribution$beta),
    accuracy = 1e-1, f = floor
  )

  # upper (right) limit
  x_upper <- plyr::round_any(
    x = max(permutation.out$sampling_distribution$beta),
    accuracy = 1e-1, f = ceiling
  )

  # y position of the plot annotation
  y_pos_anno <- as.integer(
    x = max(density(permutation.out$sampling_distribution$beta)[["y"]])
  ) - 1L

  # permutation analysis plot
  suppressWarnings(
    p <- ggplot2::ggplot(
      data = permutation.out$sampling_distribution,
      mapping = ggplot2::aes(x = beta)
    ) +
      ggplot2::geom_density(color = "black", fill = "gray") +
      ggplot2::geom_vline(
        xintercept = permutation.out$actual_estimate,
        linetype = "dashed", linewidth = 1
      ) +
      ggplot2::annotate(
        "segment",
        x = permutation.out$actual_estimate + 0.05,
        xend = permutation.out$actual_estimate + 0.005,
        y = y_pos_anno,
        yend = y_pos_anno,
        arrow = ggplot2::arrow(length = grid::unit(2, "mm"))
      ) +
      ggplot2::annotate(
        geom = "text", x = permutation.out$actual_estimate + .06,
        y = y_pos_anno, label = "Observed difference", hjust = "left", size = 3
      ) +
      ggplot2::annotate(
        geom = "text",
        x = permutation.out$actual_estimate + .06,
        y = y_pos_anno - .3,
        label = paste0("(p-value=", round(permutation.out$pvalue, 2), ")"),
        hjust = "left", size = 3
      ) +
      ggplot2::labs(
        title = title,
        x = "Satisfaction with Democracy (normalized)",
        y = "Density"
      ) +
      ggplot2::scale_x_continuous(
        labels = seq(from = -.2, to = .6, by = .2),
        breaks = seq(from = -.2, to = .6, by = .2)
      ) +
      #coord_cartesian(xlim = c(-0.15, 0.6)) +
      ggplot2::theme(
        plot.title = ggplot2::element_text(
          size = 13, family = "mono", face = "plain"
        ),
        axis.title.x = ggplot2::element_text(
          vjust = -1, size = 10, family = "mono", face = "plain"
        ),
        axis.title.y = ggplot2::element_text(
          vjust = +2, size = 10, family = "mono", face = "plain"
        ),
        axis.text.y = ggplot2::element_text(
          size = 9, face = "plain", color = "black"
        ),
        axis.text.x = ggplot2::element_text(
          size = 9, face = "plain", color = "black"
        ),
        panel.background = ggplot2::element_rect(
          fill = "white", color = "black", linetype = "solid"
        ),
        plot.background = ggplot2::element_rect(fill = "white"),
        panel.grid = ggplot2::element_blank(),
        panel.grid.major.y = ggplot2::element_blank(),
        panel.grid.minor.y = ggplot2::element_blank(),
        panel.border = ggplot2::element_rect(
          color = "black", fill = NA, linewidth = 1
        ),
        panel.spacing.x = grid::unit(x = 3, units = "cm"),
        panel.spacing.y = grid::unit(x = 3, units = "cm"),
        strip.background = ggplot2::element_rect(
          color = "black", fill = "gray"
        ),
        plot.margin = grid::unit(c(1, 1, 1, 1), "lines")
      )
  )

  return(p)

}


# RDD estimates with varying cutoffs --------------------------------------

var_cutoffs <- function(
  data, outcome, running.var, clustervar = NULL,
  filename, plot_title, cutoffs = NULL,
  title.offset = 0, subtitle.offset = 0,
  caption = "Note: Robust RDD estimates with 95% confidence intervals"
) {

  # import pipe
  `%>%` <- dplyr::`%>%`
  lb <- ub <- NULL

  # sequendfsdfsdfce of cut-off dates
  if (is.null(cutoffs)) {
    cutoff_seq <- seq(from = -30, to = 30, by = 1)
  } else {
    cutoff_seq <- seq(from = cutoffs[1], to = cutoffs[2], by = 1)
  }

  # clustervariable
  if (!is.null(clustervar)) {
    # model formula
    ff <- paste0(outcome, "~", clustervar)
    # model frame
    df <- model.frame(formula =  ff, data)
    # design matrix
    design_matrix <- cbind(model.matrix(as.formula(ff), df)[, -1])
    # drop columns without variation
    design_matrix <- cbind(
      design_matrix[, apply(design_matrix, 2, function(x) length(table(x))) > 1]
    )
  } else {
    design_matrix <- NULL
  }

  # vary the cutoff
  outcomes <- lapply(X = seq_along(cutoff_seq), FUN = function(x) {

    # attempt to run RDD
    m <- try(
      expr = {
        # model
        suppressWarnings(
          rdrobust::rdrobust(
            # the outcome
            y = dplyr::pull(.data = data, var = outcome),
            # the running variable
            x = dplyr::pull(.data = data, var = running.var),
            # the cutoff
            c = cutoff_seq[x],
            # the cluster variable as a fixed effect
            covs = design_matrix,
            # variable bandwidths either side of the cutoff
            bwselect = "certwo",
            # uniform kernel
            kernel = "uniform",
            # report all estimates
            all = TRUE
          )
        )
      },
      silent = TRUE
    )
    if (class(m) != "try-error") {
      # estimates
      out <- tibble::tibble(
        estimate = m$coef["Robust", ],
        lb = m$ci["Robust", "CI Lower"],
        ub = m$ci["Robust", "CI Upper"],
        cutoff = cutoff_seq[x]
      )
    } else {
      # estimates
      out <- tibble::tibble(
        estimate = NA,
        lb = NA,
        ub = NA,
        cutoff = cutoff_seq[x]
      )
    }
    return(out)
  })

  # combine results
  res <- outcomes %>%
    dplyr::bind_rows(outcomes) %>%
    dplyr::mutate(`RDD Estimate:` = ifelse(
      test = lb < 0 & ub > 0, yes = 0, no = 1
    )) %>%
    dplyr::mutate(
      `RDD Estimate:` = factor(
        `RDD Estimate:`,
        levels = c(0, 1),
        labels = c("insignificant", "significant")
      )
    ) %>%
    dplyr::filter(!is.na(`RDD Estimate:`))

  # y-axis limits
  y_lower <- plyr::round_any(x = min(res$lb), accuracy = 1e-1, f = floor)
  y_upper <- plyr::round_any(x = max(res$ub), accuracy = 1e-1, f = ceiling)

  # x-axis limits
  x_lower <- min(cutoff_seq)
  x_upper <- max(cutoff_seq)

  # plot theme
  ggplot2::theme_set(new = ggthemes::theme_wsj(base_size = 9))

  # plot
  var_cutoff_plot <- ggplot2::ggplot(
    data = res,
    mapping = ggplot2::aes(x = cutoff, ymin = lb, ymax = ub)
  ) +
    ggplot2::geom_vline(xintercept = 0, lty = "dashed") +
    ggplot2::geom_hline(yintercept = 0, lty = "dashed", color = "red") +
    ggplot2::geom_linerange(ggplot2::aes(color = `RDD Estimate:`), size = 1) +
    ggplot2::scale_color_brewer(palette = "Dark2") +
    ggplot2::geom_point(ggplot2::aes(x = cutoff, y = estimate)) +
    ggplot2::scale_x_continuous(
      limits = c(x_lower, x_upper),
      labels = seq(from = x_lower, to = x_upper, by = 5),
      breaks = seq(from = x_lower, to = x_upper, by = 5)
    ) +
    ggplot2::scale_y_continuous(
      labels = round(seq(from = -2.5, to = 2.5, by = 0.5), digits = 1),
      breaks = round(seq(from = -2.5, to = 2.5, by = 0.5), digits = 1)
    ) +
    ggplot2::coord_cartesian(ylim = c(-2.5, 2.5)) +
    ggplot2::labs(
      x = "Cut-off date relative to actual government formation",
      y = "Effect Size",
      subtitle = plot_title
    ) + #"Doughnut RDD with Varying Cut-off Dates on the x-axis",
    ggplot2::theme(
      axis.title.x = ggplot2::element_text(vjust = -1, size = 11),
      axis.title.y = ggplot2::element_text(vjust = 1, size = 11),
      axis.text.y = ggplot2::element_text(size = 9, face = "plain"),
      axis.text.x = ggplot2::element_text(size = 9, face = "plain"),
      plot.title = ggplot2::element_text(hjust = title.offset, size = 15),
      plot.subtitle = ggplot2::element_text(hjust = subtitle.offset, size = 11),
      plot.caption = ggplot2::element_text(size = 10, vjust = -3),
      plot.caption.position = "plot",
      panel.background = ggplot2::element_rect(fill = "white"),
      plot.background = ggplot2::element_rect(fill = "white"),
      legend.background = ggplot2::element_rect(
        fill = "white", color = "white"
      ),
      legend.key = ggplot2::element_rect(fill = "white", color = "white"),
      legend.title = ggplot2::element_text(size = 9)
    )

  return(var_cutoff_plot)

}



# rdd power ---------------------------------------------------------------

est_rdd_power <- function(
  data, outcome, running.var, cluster = NULL,
  tau = 0.5, cutoff = 0
) {

  # import pipe
  `%>%` <- dplyr::`%>%`

  # outcome and running variable to matrix
  Y_X <- dplyr::select(
    .data = data, !!!rlang::syms(c(outcome, running.var))
  ) %>%
    as.matrix()

  # cluster variable
  if (!is.null(cluster)) {
    cluster <- dplyr::pull(.data = data, var = cluster)
  }

  # rdd bandwidth
  bw <- quiet(rdrobust::rdbwselect(
    masspoints = "off",
    y = dplyr::pull(.data = data, var = outcome),
    x = dplyr::pull(.data = data, var = running.var),
    c = cutoff,
    bwselect = "certwo",
    kernel = "uniform",
    cluster = cluster
  ))


  # rdd power
  rdd_power <- quiet(rdpower::rdpower(
    kernel = "uniform",
    vce = "nn",
    data = Y_X,
    cutoff = cutoff,
    tau = tau,
    all = TRUE,
    samph = c(bw$bws[1, "b (left)"], bw$bws[1, "b (right)"]),
    cluster = cluster
  ))

  return(rdd_power)

}


# prep datasets -----------------------------------------------------------

# wrapper for country-specific functions
prepare_data <- function(filenames, abstainers = FALSE) {

  # import pipe
  `%>%` <- dplyr::`%>%`
  election <- party <- swd <- elecdist <- NULL

  # prepare iceland
  isl <- prepare_iceland(
    filename = filenames$iceland, abstainers = abstainers
  )

  # prepare norway
  nor <- prepare_norway(filename = filenames$norway, abstainers = abstainers)

  # prepare netherlands
  nld <- prepare_netherlands(
    filename = filenames$netherlands, abstainers = abstainers
  )

  # combine datasets
  data <- dplyr::bind_rows(
    isl, nor, nld
  )

  # turn the election and party variables into factor variables
  data <- data %>%
    dplyr::mutate(
      election = as.factor(election),
      party = as.factor(party)
    ) %>%
    # remove observations that are missing on SWD
    dplyr::filter(!is.na(swd)) %>%
    # normalize satisfaction with democracy
    dplyr::mutate(swd = as.numeric(scale(x = swd, center = TRUE, scale = TRUE)))

  # binary period variable
  # gov_formed = 1 after government formation was anounced and winner-loser
  # status is known
  data <- data %>%
    dplyr::mutate(gov_formed = dplyr::case_when(
      relative_date < 0 ~ 0,
      relative_date > 0 ~ 1
    ))

  # generate a variable that indicates whether left_right_self is not answered
  # on the 0 to 10 scale.
  data <- data %>%
    dplyr::mutate(
      lr_na = dplyr::case_when(
        left_right_self > 10 ~ 1,
        left_right_self < 11 ~ 0
      )
    )

  # re-code values > 10 on left_right_self to NA
  data$left_right_self[data$left_right_self > 10] <- NA_integer_

  # re-code elect_dist to a factor variable
  data <- data %>%
    dplyr::mutate(elecdist = dplyr::case_when(
      election == "Iceland 2017 general election" ~ sprintf("ICE%s", elecdist),
      election == "Norway 1997 general election" ~ sprintf("NOR%s", elecdist),
      election == "Netherlands 2012 general election" ~ elecdist
    )) %>%
    dplyr::mutate(elecdist = as.factor(elecdist))

  #  generate a variable that indicates whether education is not answered
  data <- data %>%
    dplyr::mutate(
      edu_na = dplyr::case_when(
        education > 9 ~ 1,
        education < 10 ~ 0
      )
    )

  # re-code values > 9 on education variable to NA
  data$education[data$education > 9] <- NA_integer_

  # turn education into a factor variable
  data$education <- as.factor(data$education)

  #  generate a variable that indicates whether household_income is not answered
  data <- data %>%
    dplyr::mutate(
      hhinc_na = dplyr::case_when(
        household_income > 10 ~ 1,
        household_income < 11 ~ 0
      )
    )

  # re-code values > 10 on household_income variable to NA
  data$household_income[data$household_income > 10] <- NA_integer_

  # turn household_income into a factor variable
  data$household_income <- as.factor(data$household_income)

  return(data)
}

# prepare norway
prepare_norway <- function(filename, abstainers) {

  # initial binding of globals
  cses1 <- A1004 <- A1026 <- A1027 <- A1028 <- A2030 <- A3001 <- NULL
  A2001 <- A2002 <- A2003 <- A2012 <- A2019 <- A3031 <- swd <- party <- NULL
  win_cabinet <- win_largest <- win_junior <- labour_loser <- gov_date <- NULL

  # import pipe
  `%>%` <- dplyr::`%>%`

  # load data ---------------------------------------------------------------
  load(file = filename)
  nor <- cses1 %>%
    tibble::as_tibble()
  rm(cses1)

  # cleaning ----------------------------------------------------------------

  # subset data
  nor <- nor %>%
    # keep Norway 1997 only
    dplyr::filter(A1004 == "NOR_1997") %>%
    # keep only relevant columns (see cses1_codebook_part2_variables.txt)
    dplyr::select(
      A1026, A1027, A1028, A2030, A3001,
      age = A2001,
      gender = A2002,
      education = A2003,
      household_income = A2012,
      elecdist = A2019,
      left_right_self = A3031
    )

  # by default abstainers are excluded, otherwise (robustness check they are
  # included)
  if (!abstainers) {
    nor <- nor %>%
      # keep only those who cast a ballot
      dplyr::filter(A2030 <= 10)
  } else if (abstainers) {
    # keep abstainers but exclude those who refused to answer
    nor <- nor %>%
      # keep only those who cast a ballot
      dplyr::filter(A2030 != 99)
  }

  # A1026    >>> DATE QUESTIONNAIRE ADMINISTERED - MONTH
  # A1027    >>> DATE QUESTIONNAIRE ADMINISTERED - DAY
  # A1028    >>> DATE QUESTIONNAIRE ADMINISTERED - YEAR
  # A2030    >>> PARTY LIST VOTED FOR - DISTRICT
  # A3001    >>> SATISFACTION WITH DEMOCRATIC PROCESS

  # balance stats variables
  # A2001    >>> Age
  # A2002    >>> GENDER
  # A2003    >>> EDUCATION
  # A2012    >>> HOUSEHOLD INCOME
  # A2019    >>> REGION OF RESIDENCE
  # A3031    >>> LEFT-RIGHT - SELF

  # generate variables ------------------------------------------------------

  # satisfaction with democracy
  nor <- nor %>%
    # 4 categories but coded as if continuous
    dplyr::mutate(
      swd = NA_integer_,
      swd = ifelse(test = A3001 == 5, yes = 0, no = swd),
      swd = ifelse(test = A3001 == 4, yes = .33, no = swd),
      swd = ifelse(test = A3001 == 2, yes = .67, no = swd),
      swd = ifelse(test = A3001 == 1, yes = 1, no = swd)
    ) %>%
    # binary
    dplyr::mutate(
      swd_binary = ifelse(test = swd > .5, yes = 1, no = 0)
    ) %>%
    # ordinal
    dplyr::mutate(
      swd_ordinal = factor(
        swd,
        levels = c(0, .33, .67, 1),
        labels = c(
          "not at all satisfied",
          "not very satisfied",
          "fairly satisfied",
          "very satisfied"
        )
      )
    )

  # A2030    >>> PARTY LIST VOTED FOR - DISTRICT
  #  ---------------------------------------------------------------------------
  #    >>> PARTIES AND LEADERS: NORWAY (1997)
  #  ---------------------------------------------------------------------------
  #
  #  01.          RED ELECTORAL ALLIANCE
  #  02. PARTY F  SOCIALIST LEFT PARTY           LEADER F  KRISTIN HALVORSEN
  #  03. PARTY A  LABOR PARTY                    LEADER A  THORBJORN JAGLAND
  #  04.          LIBERAL PARTY
  #  05. PARTY D  CHRISTIAN PEOPLE"S PARTY       LEADER D  KJELL MAGNE BONDEVIK
  #  06. PARTY E  CENTER PARTY                   LEADER E  ANN ENGER LAHNSTEIN
  #  08. PARTY C  CONSERVATIVE PARTY             LEADER C  JAN PETERSEN
  #  09. PARTY B  PROGRESS PARTY                 LEADER B  CARL IVAR HAGEN
  #  10.          OTHER PARTIES

  # Norway 1997 government consisted of:
  # Christian People"s Party, Liberal Party and the Centre Party

  # Winner cabinet, no difference major/junior partner
  nor <- nor %>%
    dplyr::mutate(
      win_cabinet = 0,
      # Liberal Party
      win_cabinet = ifelse(test = A2030 == 4, yes = 1, no = win_cabinet),
      # Christian People"s Party
      win_cabinet = ifelse(test = A2030 == 5, yes = 1, no = win_cabinet),
      # Centre Party
      win_cabinet = ifelse(test = A2030 == 6, yes = 1, no = win_cabinet)
    )

  # Centre Party (Norway)
  # Christian People"s Party of Norway
  # Liberal Party of Norway

  # party variable for party fixed effects
  nor <- nor %>%
    dplyr::mutate(
      party = NA_character_,
      party = ifelse(
        test = A2030 == 1,
        yes = "Red Electoral Alliance (Norway)",
        no = party
      ),
      party = ifelse(
        test = A2030 == 2,
        yes = "Socialist Left Party (Norway)",
        no = party
      ),
      party = ifelse(
        test = A2030 == 3,
        yes = "Labour Party (Norway)",
        no = party
      ),
      party = ifelse(
        test = A2030 == 4,
        yes = "Liberal Party (Norway)",
        no = party
      ),
      party = ifelse(
        test = A2030 == 5,
        yes = "Christian People's Party (Norway)",
        no = party
      ),
      party = ifelse(
        test = A2030 == 6,
        yes = "Centre Party (Norway)",
        no = party
      ),
      party = ifelse(
        test = A2030 == 8,
        yes = "Conservative Party (Norway)",
        no = party
      ),
      party = ifelse(
        test = A2030 == 9,
        yes = "Progress Party (Norway)",
        no = party
      ),
      party = ifelse(
        test = A2030 == 10,
        yes = "Other Parties",
        no = party
      )
    )

  # election variable for election fixed effects
  nor <- nor %>%
    dplyr::mutate(election = "Norway 1997 general election")

  # Voted for the largest party/PM party (Voir blais sur
  # coalition partner junior v/s major)
  nor <- nor %>%
    dplyr::mutate(
      win_largest = 0,
      # Christian People"s Party
      win_largest = ifelse(test = A2030 == 5, yes = 1, no = win_largest)
    )

  # Voted for junior coalition partner
  nor <- nor %>%
    dplyr::mutate(
      win_junior = 0,
      # Liberal Party
      win_junior = ifelse(test = A2030 == 4, yes = 1, no = win_junior),
      # Centre Party
      win_junior = ifelse(test = A2030 == 6, yes = 1, no = win_junior)
    )

  # Labour loser
  nor <- nor %>%
    dplyr::mutate(
      labour_loser = 0,
      labour_loser = ifelse(test = A2030 == 3, yes = 1, no = 0)
    )

  # Timeline
  # Government announcment date
  # NOTE: The government announcement date was updated to 14 Oct 1997
  #       For details see paper.
  nor <- nor %>%
    dplyr::mutate(
      gov_date = as.Date(x = "14/10/1997", "%d/%m/%Y")
    )

  # A1026    >>> DATE QUESTIONNAIRE ADMINISTERED - MONTH
  # A1027    >>> DATE QUESTIONNAIRE ADMINISTERED - DAY
  # A1028    >>> DATE QUESTIONNAIRE ADMINISTERED - YEAR
  # interview date
  nor <- nor %>%
    dplyr::mutate(
      interview_date = as.Date(
        x = paste0(
          .$A1027, "/",
          .$A1026, "/",
          .$A1028
        ), "%d/%m/%Y"
      )
    )

  # Date of the government formation compared to the
  # date of the interview (where the election day = 0)
  nor <- nor %>%
    dplyr::mutate(relative_date = gov_date - interview_date) %>%
    dplyr::mutate(relative_date = as.integer(relative_date))

  # gender variable 1 = male; 2 = female
  nor <- nor %>%
    dplyr::mutate(female = dplyr::case_when(
      gender == 1 ~ 0,
      gender == 2 ~ 1
    )) %>%
    dplyr::select(-gender)

  # coerce to character
  nor$elecdist <- as.character(nor$elecdist)

  # keep only relevant columns
  nor <- dplyr::select(
    .data = nor, election, party, swd, swd_binary, swd_ordinal, win_cabinet,
    relative_date, win_largest, win_junior, labour_loser, age, education,
    household_income, left_right_self, female, elecdist
  )

  return(nor)
}

# prepare Iceland
prepare_iceland <- function(filename, abstainers) {

  # import pipe
  `%>%` <- dplyr::`%>%`
  democr <- swd <- prtvote17 <- win_cabinet <- NULL
  win_largest <- party <- NULL

  # load data ---------------------------------------------------------------
  isl <- readstata13::read.dta13(
    file = filename,
    convert.factors = FALSE, generate.factors = FALSE
  ) %>%
    tibble::as_tibble()

  # generate variables ------------------------------------------------------

  # satisfaction with democracy
  #1. Very satisfied #2. Fairly satisfied #3. Not very satisfied
  #4. Not at all satisfied #7. Refuses to answer (volunteered)
  #8. Don’t know (volunteered) #9. Missing (registered)
  isl <- isl %>%
    dplyr::mutate(
      swd = NA_integer_,
      swd = ifelse(test = democr == 4, yes = 0, no = swd),
      swd = ifelse(test = democr == 3, yes = .33, no = swd),
      swd = ifelse(test = democr == 2, yes = .67, no = swd),
      swd = ifelse(test = democr == 1, yes = 1, no = swd)
    ) %>%
    # binary
    dplyr::mutate(swd_binary = ifelse(test = swd > .5, yes = 1, no = 0)) %>%
    # ordinal
    dplyr::mutate(
      swd_ordinal = factor(
        x = swd, levels = c(0, 0.33, 0.67, 1),
        labels = c(
          "not at all satisfied",
          "not very satisfied",
          "fairly satisfied",
          "very satisfied"
        )
      )
    )

  # winner cabinet, no difference major/junior partner
  isl <- isl %>%
    dplyr::mutate(
      win_cabinet = 0,
      # the left-green movement (the PM, even if she did not win most votes)
      win_cabinet = ifelse(test = prtvote17 == 4, yes = 1, no = win_cabinet),
      # the incumbent independence party
      win_cabinet = ifelse(test = prtvote17 == 3, yes = 1, no = win_cabinet),
      # the progressive party
      win_cabinet = ifelse(test = prtvote17 == 2, yes = 1, no = win_cabinet)
    )

  # Voted for the largest party/PM party (Voir blais sur coalition
  # partner junior v/s major)
  isl <- isl %>%
    dplyr::mutate(
      win_largest = 0,
      # Left-Green Movement
      win_largest = ifelse(test = prtvote17 == 3, yes = 1, no = win_largest)
    )

  # Voted for junior coalition partner
  isl <- isl %>%
    dplyr::mutate(
      win_junior = 0,
      # for the Independence Party
      win_junior = ifelse(test = prtvote17 == 4, yes = 1, no = win_junior),
      # for the Progressive Party
      win_junior = ifelse(test = prtvote17 == 2, yes = 1, no = win_junior)
    )

  # party affiliation for party fixed effects
  isl <- isl %>%
    dplyr::mutate(
      party = NA_character_,
      party = ifelse(
        test = prtvote17 == 1,
        yes = "Social Democratic Alliance (Iceland)",
        no = party
      ),
      party = ifelse(
        test = prtvote17 == 2,
        yes = "Progressive Party (Iceland)",
        no = party
      ),
      party = ifelse(
        test = prtvote17 == 3,
        yes = "Independence Party (Iceland)",
        no = party
      ),
      party = ifelse(
        test = prtvote17 == 4,
        yes = "Left-Green Movement (Iceland)",
        no = party
      ),
      party = ifelse(
        test = prtvote17 == 5,
        yes = "Bright Future (Iceland)",
        no = party
      ),
      party = ifelse(
        test = prtvote17 == 6,
        yes = "Pirate Party (Iceland)",
        no = party
      ),
      party = ifelse(
        test = prtvote17 == 7,
        yes = "Dawn (Iceland)",
        no = party
      ),
      party = ifelse(
        test = prtvote17 == 8,
        yes = "Reform Party (Iceland)",
        no = party
      ),
      party = ifelse(
        test = prtvote17 == 9,
        yes = "People's Front of Iceland (Iceland)",
        no = party
      ),
      party = ifelse(
        test = prtvote17 == 10,
        yes = "Centre Party (Iceland)",
        no = party
      ),
      party = ifelse(
        test = prtvote17 == 11,
        yes = "People's Party (Iceland)",
        no = party
      ),
      party = ifelse(
        test = prtvote17 == 89,
        yes = "Another party (Iceland)",
        no = party
      ),
      party = ifelse(
        test = prtvote17 == 90,
        yes = "Voted, does not want to say which party (Iceland)",
        no = party
      ),
      party = ifelse(
        test = prtvote17 == 92,
        yes = "Cast a blank ballot (Iceland)",
        no = party
      ),
      party = ifelse(
        test = prtvote17 == 93,
        yes = "Cast an invalid vote (Iceland)",
        no = party
      ),
      party = ifelse(
        test = prtvote17 == 96,
        yes = "Not applicable (Iceland)",
        no = party
      ),
      party = ifelse(
        test = prtvote17 == 97,
        yes = "Refuses to answer (Iceland)",
        no = party
      ),
      party = ifelse(
        test = prtvote17 == 98,
        yes = "Don't know (Iceland)",
        no = party
      ),
      party = ifelse(
        test = prtvote17 == 99,
        yes = "Missing (Iceland)",
        no = party
      )
    )

  # remove the following categories from the analysis
  # 1) "Voted, does not want to say which party (Iceland)"
  # 2) "Cast a blank ballot (Iceland)"
  # 3) "Cast an invalid vote (Iceland)"
  # 4) "Not applicable (Iceland)"
  # 5) "Refuses to answer (Iceland)"
  # 6) "Don"t know (Iceland)"
  # 7) "Missing (Iceland)"
  if (!abstainers) {
    isl <- isl %>%
      dplyr::filter(
        party != "Voted, does not want to say which party (Iceland)",
        party != "Cast a blank ballot (Iceland)",
        party != "Cast an invalid vote (Iceland)",
        party != "Not applicable (Iceland)",
        party != "Refuses to answer (Iceland)",
        party != "Don't know (Iceland)",
        party != "Missing (Iceland)"
      )
  } else if (abstainers) {
    isl <- isl %>%
      dplyr::filter(
        party != "Voted, does not want to say which party (Iceland)",
        party != "Not applicable (Iceland)",
        party != "Refuses to answer (Iceland)",
        party != "Don't know (Iceland)",
        party != "Missing (Iceland)"
      )
  }

  # country for country fixed effects
  isl <- isl %>%
    dplyr::mutate(election = "Iceland 2017 general election")

  # Date of the government formation compared to the date of the interview Based
  # on the intervnrday which is the # of days after the election (held Oct 28th)
  # To locate the cut-off at Nov 28th, we subtract 31 from intervnrday
  isl <- isl %>%
    dplyr::mutate(relative_date = intervnrday - 31)

  # labour_loser variable is only defined for Norway
  isl <- dplyr::mutate(.data = isl, labour_loser = NA_integer_)

  # gender variable: 1 = male; 2 = female
  isl <- isl %>%
    dplyr::mutate(female = dplyr::case_when(
      gender == "Karl" ~ 0,
      gender == "Kona" ~ 1
    )) %>%
    dplyr::select(-gender)

  # coerce to character
  isl$elecdist <- as.character(isl$elecdist)

  # drop irrelevant columns
  isl <- dplyr::select(
    .data = isl, election, party, swd, swd_binary, swd_ordinal, win_cabinet,
    relative_date, win_largest, win_junior, labour_loser, female, elecdist,
    left_right_self = lrscale, age, household_income = incomecate, education
  )

  return(isl)
}


# prepare netherlands
prepare_netherlands <- function(filename, abstainers) {

  # import pipe
  `%>%` <- dplyr::`%>%`

  # load data ---------------------------------------------------------------

  # main dataset
  nld <- readr::read_csv(file = filename$main) %>%
    dplyr::filter(EventLabel == "NL 2012")

  # additional dataset
  nld_add <- readr::read_csv(file = filename$additional)

  # cleaning ----------------------------------------------------------------

  # rename variables
  nld <- nld %>%
    dplyr::rename(
      relative_date = TimeGap,
      swd = stfdem,
      win_cabinet = winner
    )

  if (!abstainers) {
    nld <- nld %>%
      dplyr::mutate(win_cabinet = dplyr::case_when(
        party_vote == 14 ~ 99,
        party_vote != 14 ~ win_cabinet
      ))
  } else if (abstainers) {
    # if abstainers should be included
    nld <- nld %>%
      dplyr::mutate(win_cabinet = dplyr::case_when(
        party_vote == 14 ~ 0,
        party_vote != 14 ~ win_cabinet
      ))
  }

  # 20 of September is the correct date of the government announcement
  nld <- nld %>%
    dplyr::mutate(
      relative_date = as.numeric(
        as.Date(i_date, "%Y-%m-%d") - as.Date("2012-09-20", "%Y-%m-%d")
      )
    )

  # recode win_cabinet code 99 to NA and drop missing values
  nld <- dplyr::mutate(
    .data = nld,
    win_cabinet = ifelse(
      test = win_cabinet == 99,
      yes = NA,
      no = win_cabinet
    )
  ) %>%
    dplyr::filter(!is.na(win_cabinet))

  # convert swd scale
  nld <- nld %>%
    dplyr::mutate(swd = swd / 10)

  # election for election fixed effects
  nld <- nld %>%
    dplyr::mutate(election = "Netherlands 2012 general election")

  # winners:
  # 1) Party for Freedom and Democracy (VVD) (Mark Rutte)
  # 3) Labour Party (Pvda)

  # party
  nld <- nld %>%
    dplyr::mutate(party = paste0(party_l.x, " (Netherlands)"))

  # generate win_largest (if voted for largest party) and
  # win_junior if voted for CDA
  nld <- nld %>%
    dplyr::mutate(
      win_largest = ifelse(
        test = party == "Party for Freedom and Democracy (Netherlands)",
        yes = 1,
        no = 0
      ),
      win_junior = ifelse(
        test = party == "Labour Party (Netherlands)",
        yes = 1,
        no = 0
      )
    )

  # generate variables that are defined only for Iceland and Norway
  nld <- nld %>%
    dplyr::mutate(
      swd_binary = NA_integer_,
      swd_ordinal = factor(
        x = NA, levels = c(0, 0.33, 0.67, 1),
        labels = c(
          "not at all satisfied",
          "not very satisfied",
          "fairly satisfied",
          "very satisfied"
        )
      ),
      labour_loser = NA_integer_
    )

  # gender variable 1 = male; 2 = female
  nld <- nld %>%
    dplyr::mutate(female = dplyr::case_when(
      gndr == 1 ~ 0,
      gndr == 2 ~ 1
    )) %>%
    dplyr::select(-gndr)

  # subset additional data
  nld_add <- nld_add %>%
    dplyr::select(idno, lrscale, hinctnta, region, regunit)

  # subset main data
  nld <- nld %>%
    dplyr::select(
      idno, election, party, swd, swd_binary, swd_ordinal, win_cabinet,
      relative_date, win_largest, win_junior, labour_loser, female, agea,
      edulvla
    )

  # join
  nld <- nld %>%
    dplyr::left_join(y = nld_add, by = "idno")

  # rename agea to age
  nld <- nld %>%
    dplyr::rename(age = agea)

  # rename edulvla to edu
  nld <- nld %>%
    dplyr::rename(education = edulvla)

  # rename lrscale to left_right_self
  nld <- nld %>%
    dplyr::rename(left_right_self = lrscale)

  # rename hinctnta to income
  nld <- nld %>%
    dplyr::rename(household_income = hinctnta)

  # drop irrelevant columns
  nld <- nld %>%
    dplyr::select(
      election, party, swd, swd_binary, swd_ordinal, win_cabinet, relative_date,
      win_largest, win_junior, labour_loser, female, age, education,
      left_right_self, household_income, elecdist = region
    )

  return(nld)
}



# balance statistics table ------------------------------------------------
balance_table <- function(dv, data, label) {

  # inital binding of globals
  gov_formed <- NULL
  `%>%` <- magrittr::`%>%`

  fn <- formula(paste0(dv, "~ gov_formed"))
  m1 <- summary(lm(fn, data = data))

  out <- cbind(
    label,
    round(
      mean(
        data %>%
          dplyr::filter(gov_formed == 0) %>%
          dplyr::pull(var = !!dv), na.rm = TRUE
      ), digits = 2
    ),
    round(
      mean(
        data %>%
          dplyr::filter(gov_formed == 1) %>%
          dplyr::pull(var = !!dv), na.rm = TRUE
      ), digits = 2
    ),
    round(m1$coefficients[2, "Estimate"], digits = 2),
    round(m1$coefficients[2, "Pr(>|t|)"], digits = 2)
  )

  return(out)
}
